4/4/24
##################
# LOAD LIBRARIES #
##################
library(tidyverse)
library(Seurat)
library(tidyseurat)
library(cowplot)
library(patchwork)
source("../bin/spatial_visualization.R")
source("../bin/plotting_functions.R")
#########
# PATHS #
#########
input_dir <- "../results/02_integrate_st_data/"
result_dir <- "../results/03_clustering_st_data/"
if( isFALSE(dir.exists(result_dir)) ) { dir.create(result_dir,recursive = TRUE) }
#############
# LODA DATA #
#############
DATA <- readRDS(paste0(input_dir,"seuratObj_integrated.RDS"))
#################
# COLOUR PALLET #
#################
clus_1.5 <- c("#F8766D","#7CAE00","#CD9600","#00A9FF","#984EA3","#C77CFF","#00BFC4","#FF7F00",
"#FFFF33","#E41A1C","#FF61CC","#4DAF4A","#A65628","#F781BF","#999999")
clus_1 <- c( "#CD9600","#7CAE00","#e0e067","#00A9FF","#377EB8","#984EA3","#E41A1C","#C77CFF",
"#00BFC4","#FF7F00","#FFFF33","#CD9600","#4DAF4A","#A65628","#F781BF","#999999")
##################################
# EVALUATE CLUSTERING RESOLUTION #
##################################
DATA <- FindNeighbors(DATA, reduction = "harmony", dims = 1:30, k.param = 15, prune.SNN = 1/15)
# Clustering with louvain (algorithm 1) or leiden (algorithm 4)
for (res in c(0.1, 0.5, 1, 1.5, 2)) {
DATA <- FindClusters(DATA, resolution = res, algorithm = 1)
}
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 6598
Number of edges: 299026
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9375
Number of communities: 2
Elapsed time: 0 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 6598
Number of edges: 299026
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8316
Number of communities: 7
Elapsed time: 0 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 6598
Number of edges: 299026
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7608
Number of communities: 11
Elapsed time: 0 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 6598
Number of edges: 299026
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7115
Number of communities: 13
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 6598
Number of edges: 299026
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.6669
Number of communities: 16
Elapsed time: 0 seconds
# each time you run clustering, the data is stored in meta data columns:
# seurat_clusters - lastest results only RNA_snn_res.XX - for each different
# resolution you test.
# dev.new(width=6.6929133858, height=3.3, noRStudioGD = TRUE)
res <- c("RNA_snn_res.1", "RNA_snn_res.1.5")
p <- map2(res, list(clus_1,clus_1.5),
~plot_clusters.fun(DATA, cluster=.x, txt_size = 10, dot_size = 0.2,
color = .y, red = "umap_harmony") + xlab("UMAP 1") + ylab("UMAP 2"))
plot_grid(ncol = 2,
plotlist = p)
# dev.new(width=6.6929133858, height=14, noRStudioGD = TRUE)
plots <- DATA %>%
mutate(group = orig.ident) %>%
nest(., data = -group) %>%
mutate( "res_1" = pmap(.,
~plot_spatial.fun(..2, sampleid=..1, geneid="RNA_snn_res.1",
point_size = 0.8, zoom="zoom", colors = clus_1))) %>%
mutate( "res_2" = pmap(.,
~plot_spatial.fun(..2, sampleid=..1, geneid="RNA_snn_res.1.5",
point_size = 0.8, zoom="zoom", colors = clus_1.5)))
legend_1 <- get_legend(plots$res_1[[2]] + theme(legend.position="right"))
legend_2 <- get_legend(plots$res_2[[1]] + theme(legend.position="right"))
legend <- plot_grid( legend_1, legend_2, ncol = 1)
combined <- wrap_plots(plotlist=c(plots$res_1, plots$res_2), nrow = 8, byrow = F) & theme(legend.position="none")
combined <- plot_grid( combined, legend, ncol = 2, rel_widths = c(1, .3))
combined
DATA <- DATA %>%
rename(Clusters="RNA_snn_res.1") %>%
SetIdent(., value = "Clusters") %>%
select(-any_of(contains(c("RNA_snn_res","seurat_clusters"))))
# dev.new(width=3.3, height=3.3, noRStudioGD = TRUE)
p[[1]] + ggtitle("")
DATA_sub <- DATA %>%
mutate(gr = .$groups) %>%
mutate(ID = .$orig.ident) %>%
nest(., data=-c(gr, orig.ident)) %>%
mutate(epi = map(data, ~filter(.x, !(sp_annot == "SubMuc"))),
subMuc = map(data, ~filter(.x, sp_annot == "SubMuc"))) %>%
mutate(across(c("epi", "subMuc"), ~map(., ~table(.x$Clusters)), .names = "{.col}_n_before")) %>%
mutate(across(contains("_n_"), ~set_names(.x, paste0(.data[["gr"]],"_",.data[["orig.ident"]]))))
table(DATA$Clusters)
bind_cols(DATA_sub$epi_n_before, "Clus" = paste0("**",names(table(DATA$Clusters)),"**")) %>%
rowwise() %>%
mutate(DMPA_sum = sum(c_across(starts_with("DMPA_"))),
ctrl_sum = sum(c_across(starts_with("ctrl_")))) %>%
select(sort(colnames(.)[1:8]), everything()) %>%
knitr::kable(., caption = "Distribution of epithelial spots per cluster per subject")
bind_cols(DATA_sub$subMuc_n_before, "Clus" = paste0("**",names(table(DATA$Clusters)),"**")) %>%
rowwise() %>%
mutate(DMPA_sum = sum(c_across(starts_with("DMPA_"))),
ctrl_sum = sum(c_across(starts_with("ctrl_")))) %>%
select(sort(colnames(.)[1:8]), everything()) %>%
knitr::kable(., caption = "Distribution of submucosal spots per cluster per subject")
0 1 2 3 4 5 6 7 8 9 10
1148 884 747 656 646 558 511 507 471 432 38
| ctrl_P031 | ctrl_P080 | ctrl_P105 | ctrl_P118 | DMPA_P097 | DMPA_P107 | DMPA_P108 | DMPA_P114 | Clus | DMPA_sum | ctrl_sum |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 |
| 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 2 | 1 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 |
| 3 | 0 | 3 | 0 | 0 | 0 | 0 | 1 | 3 | 1 | 6 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 |
| 79 | 67 | 47 | 77 | 67 | 31 | 37 | 67 | 5 | 202 | 270 |
| 60 | 66 | 43 | 69 | 30 | 76 | 29 | 112 | 6 | 247 | 238 |
| 89 | 67 | 34 | 77 | 49 | 51 | 38 | 61 | 7 | 199 | 267 |
| 12 | 9 | 1 | 6 | 4 | 7 | 3 | 1 | 8 | 15 | 28 |
| 92 | 69 | 33 | 53 | 51 | 51 | 26 | 56 | 9 | 184 | 247 |
| 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 10 | 1 | 1 |
Distribution of epithelial spots per cluster per subject
| ctrl_P031 | ctrl_P080 | ctrl_P105 | ctrl_P118 | DMPA_P097 | DMPA_P107 | DMPA_P108 | DMPA_P114 | Clus | DMPA_sum | ctrl_sum |
|---|---|---|---|---|---|---|---|---|---|---|
| 150 | 208 | 124 | 201 | 227 | 20 | 34 | 182 | 0 | 463 | 683 |
| 131 | 192 | 118 | 121 | 182 | 10 | 36 | 91 | 1 | 319 | 562 |
| 37 | 155 | 72 | 91 | 231 | 35 | 9 | 117 | 2 | 392 | 355 |
| 46 | 100 | 76 | 130 | 141 | 25 | 43 | 88 | 3 | 297 | 352 |
| 24 | 73 | 76 | 192 | 123 | 37 | 21 | 100 | 4 | 281 | 365 |
| 1 | 5 | 8 | 20 | 41 | 1 | 1 | 9 | 5 | 52 | 34 |
| 0 | 3 | 0 | 0 | 4 | 19 | 0 | 0 | 6 | 23 | 3 |
| 1 | 33 | 0 | 2 | 4 | 0 | 0 | 1 | 7 | 5 | 36 |
| 24 | 46 | 59 | 85 | 102 | 14 | 20 | 78 | 8 | 214 | 214 |
| 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | 1 | 0 |
| 1 | 1 | 0 | 0 | 5 | 29 | 0 | 0 | 10 | 34 | 2 |
Distribution of submucosal spots per cluster per subject
# Horizontal (aspect = width/height)
# dev.new(width=6.6929133858, height=3.5, noRStudioGD = TRUE)
############################
# PLOT FACET WRAP CLUSTERS #
############################
(p <- plot_st_meta.fun( DATA,
feat = "Clusters",
zoom = "zoom",
colors = clus_1,
alpha = .9,
#annot_col = "#dbd9d9",
annot_line = .1,
img_alpha = 0,
point_size = .5))
###################
# ADD ANNOTATION #
##################
ord1 <- c("Sup_1","Sup_2","Basal_2","Basal_1","8","3","4","0","1","2","10")
ord2 <- c("6", "9", "7", "5","8","3","4","0","1","2","10")
epi_layers <- set_names(ord1, ord2)
DATA <- DATA %>%
mutate(layers = factor(epi_layers[as.character(.$Clusters)], levels = ord1))
The log2 count can work as a proxy for cell density. Here we see the distribution of counts per cluster
clus_col <- c("#E41A1C","#FF7F00", "#C77CFF","#984EA3","#00BFC4", "#00A9FF","#377EB8","#CD9600","#7CAE00", "#e0e067","#FFFF33","#FF61CC","#F781BF", "#999999")
clus_lvl <- c("6", "9", "7", "5","8","3","4","0","1","2","10")
DATA %>%
mutate("Count (log2)" = log2(.$nCount_RNA)) %>%
mutate(Clusters = factor(.$Clusters, levels = clus_lvl)) %>%
ggplot(., aes(x=Clusters, y=`Count (log2)`, fill=Clusters, color=Clusters)) +
geom_violin() + theme_light() +
scale_fill_manual(values = clus_col, aesthetics = c("fill","colour"))
##################################
# SAVE INTERMEDIATE SEURAT OJECT #
##################################
saveRDS(DATA, paste0(result_dir,"seuratObj_clustered.RDS"))
# DATA <- readRDS(paste0(result_dir,"seuratObj_clustered.RDS"))
sessionInfo()
R version 4.3.3 (2024-02-29)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Monterey 12.7.3
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/Stockholm
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] RColorBrewer_1.1-3 patchwork_1.2.0 cowplot_1.1.3 tidyseurat_0.8.0
[5] ttservice_0.4.0 SeuratObject_5.0.1 Seurat_4.3.0 lubridate_1.9.3
[9] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2
[13] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.0
[17] tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] deldir_2.0-2 pbapply_1.7-2 gridExtra_2.3
[4] rlang_1.1.3 magrittr_2.0.3 RcppAnnoy_0.0.22
[7] spatstat.geom_3.2-8 matrixStats_1.2.0 ggridges_0.5.6
[10] compiler_4.3.3 png_0.1-8 vctrs_0.6.5
[13] reshape2_1.4.4 pkgconfig_2.0.3 fastmap_1.1.1
[16] ellipsis_0.3.2 labeling_0.4.3 utf8_1.2.4
[19] promises_1.2.1 rmarkdown_2.25 tzdb_0.4.0
[22] xfun_0.42 jsonlite_1.8.8 goftest_1.2-3
[25] later_1.3.2 spatstat.utils_3.0-4 irlba_2.3.5.1
[28] parallel_4.3.3 cluster_2.1.6 R6_2.5.1
[31] ica_1.0-3 spatstat.data_3.0-4 stringi_1.8.3
[34] reticulate_1.35.0 parallelly_1.37.0 lmtest_0.9-40
[37] scattermore_1.2 Rcpp_1.0.12 knitr_1.45
[40] tensor_1.5 future.apply_1.11.1 zoo_1.8-12
[43] sctransform_0.4.1 httpuv_1.6.14 Matrix_1.6-5
[46] splines_4.3.3 igraph_2.0.2 timechange_0.3.0
[49] tidyselect_1.2.0 abind_1.4-5 rstudioapi_0.15.0
[52] yaml_2.3.8 spatstat.random_3.2-2 spatstat.explore_3.2-6
[55] codetools_0.2-19 miniUI_0.1.1.1 listenv_0.9.1
[58] lattice_0.22-5 plyr_1.8.9 shiny_1.8.0
[61] withr_3.0.0 ROCR_1.0-11 evaluate_0.23
[64] Rtsne_0.17 future_1.33.1 survival_3.5-8
[67] polyclip_1.10-6 fitdistrplus_1.1-11 pillar_1.9.0
[70] KernSmooth_2.23-22 plotly_4.10.4 generics_0.1.3
[73] sp_2.1-3 hms_1.1.3 munsell_0.5.0
[76] scales_1.3.0 globals_0.16.2 xtable_1.8-4
[79] glue_1.7.0 lazyeval_0.2.2 tools_4.3.3
[82] data.table_1.15.0 RANN_2.6.1 fs_1.6.3
[85] leiden_0.4.3.1 dotCall64_1.1-1 grid_4.3.3
[88] colorspace_2.1-0 nlme_3.1-164 cli_3.6.2
[91] spatstat.sparse_3.0-3 spam_2.10-0 fansi_1.0.6
[94] viridisLite_0.4.2 uwot_0.1.16 gtable_0.3.4
[97] digest_0.6.34 progressr_0.14.0 ggrepel_0.9.5
[100] farver_2.1.1 htmlwidgets_1.6.4 htmltools_0.5.7
[103] lifecycle_1.0.4 httr_1.4.7 mime_0.12
[106] MASS_7.3-60.0.1